package tech.abing.spider.task.pageprocessor.amac;

import java.sql.Timestamp;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import tech.abing.spider.component.PageProcessor;
import tech.abing.spider.component.PageScript;
import tech.abing.spider.model.Proccessable;
import tech.abing.spider.page.OkPage;
import tech.abing.spider.page.Page;
import tech.abing.spider.request.BasicRequest;
import tech.abing.spider.request.PageRequest;
import tech.abing.spider.request.StartContext;
import tech.abing.spider.task.model.amac.DisciplinaryActionEntity;
import tech.abing.spider.task.utils.TaskUtil;
import tech.abing.spider.util.DateUtils;

/**
 * 解析处理纪律处分页面
 * @author Ivan qb.huang@wescxx.com   
 * @date 2016-11-3 上午09:44:07 
 * @version V2.0
 */
public class DisciplinaryActionProcessor implements PageProcessor {

	@Override
	public PageScript getJavaScript() {
		return null;
	}

	@Override
	public Pattern getNormalContain() {
		return null;
	}

	@Override
	public void process(OkPage page, StartContext context, List<BasicRequest> queue,
			List<Proccessable> objectContainer) throws Exception {
		//System.out.println(page.getContent());
		
		Document doc = Jsoup.parse(page.getContent());
		PageRequest request = page.getRequest();
		
		if(!request.getUrl().contains("index_")){
			//获取总页数
			//上一页 1 下一页   第 /1页  跳转
			String totalPagesStr = doc.select("table.noBorder tr > td").text();
			int totalPages = TaskUtil.str2Int(totalPagesStr.substring(totalPagesStr.indexOf("/")+1, totalPagesStr.lastIndexOf("页")));
			
			
			if(totalPages > 1){
				
				for(int i = 2; i <= totalPages; i++){
					
					String url = request.getUrl() + "index_"+(i - 1)+".shtml";
					PageRequest req = context.createPageRequest(url, DisciplinaryActionProcessor.class);
					req.setHeaders(request.getHeaders());
					
					queue.add(req);
					
				}
			}
		}
		
		//解析数据
		Elements eles = doc.select("div.iRight > div.newsList1,div.newsList2");
		if(eles != null && eles.size() > 0){
			
			for(Element ele : eles){
				DisciplinaryActionEntity da = new DisciplinaryActionEntity();
				da.setCreateTime(new Timestamp(System.currentTimeMillis()));
				da.setTitle(ele.select("div.newsName > a").text());
				da.setPublishDate(DateUtils.parseDate(ele.select("div.newsDate").text(), new String[]{"yyyy-MM-dd"}));
				
				//../../xxgs/jlcf/387253.shtml
				//==>http://www.amac.org.cn/xxgs/jlcf/390997.shtml
				String href = ele.select("div.newsName > a").attr("href");
				href = href.substring(href.lastIndexOf("/"));
				String url = request.getUrl().substring(0, request.getUrl().lastIndexOf("/"));
				url = url + href;
				
				PageRequest req = context.createPageRequest(url, DisciplinaryContentProcessor.class);
				
				Map<String, Object> extParams = new HashMap<String, Object>();
				extParams.put("da", da);
				req.setExtParams(extParams);
				
				queue.add(req);
			}
		}
	}


	@Override
	public void processErrorPage(Page page, StartContext context,
			List<Proccessable> objectContainer) throws Exception {
		// TODO Auto-generated method stub
		
	}

}
